#############################################################
### Geographic Representativeness of Legislatures Project ###
###                                                       ###
###           Leonardo Carella, Andrew Eggers             ###
#############################################################

# This script replicates the within-case analysis of UK and German
# single-member district legislators (section 5.2 of the paper)

# It requires two datasets as inputs:

# 1. uk_mps_bjps.rds,  for UK legislators
# 2. deu_mps_bjps.rds, for German SMD legislators

#### Packages ####

library(sjPlot)
library(xtable)
library(patchwork)
library(tidyverse)

#### Descriptive Statistics ####

uk_mps <- readRDS("uk_mps_bjps.rds")
deu_mps <- readRDS("deu_mps_bjps.rds")

# generates and prints descriptive table for UK sample (table 6)

uk_table <- t(rbind.data.frame(
  uk_mps %>% dplyr::filter(elected_in != "byelection") %>%
    dplyr::group_by(elected_in) %>%
    dplyr::summarise(valid_bp = round(mean(!is.na(bp_constituency)),2),
                     mean_margin = round(mean(absolute_margin_last_election, na.rm = T),2),
                     median_margin = round(mean(absolute_margin_last_election, na.rm = T),2),
                     safe = round(mean(absolute_margin_last_election > 0.1, na.rm = T),2),
                     ultrasafe = round(mean(absolute_margin_last_election > 0.2, na.rm = T),2),
                     med_distance = round(median(distance_to_const, na.rm = T)/1000,2),
                     parachuters = round(mean(parachuter, na.rm = T),2)),
  
  uk_mps %>% dplyr::mutate(elected_in = "Overall") %>%
    dplyr::group_by(elected_in) %>%
    dplyr::summarise(valid_bp = round(mean(!is.na(bp_constituency)),2),
                     mean_margin = round(mean(absolute_margin_last_election, na.rm = T),2),
                     median_margin = round(mean(absolute_margin_last_election, na.rm = T),2),
                     safe = round(mean(absolute_margin_last_election > 0.1, na.rm = T),2),
                     ultrasafe = round(mean(absolute_margin_last_election > 0.2, na.rm = T),2),
                     med_distance = round(median(distance_to_const, na.rm = T)/1000,2),
                     parachuters = round(mean(parachuter, na.rm = T),2))))

xtable(uk_table)

# generates and prints descriptive table for DEU sample (table 7)

deu_table <- t(rbind.data.frame(
  deu_mps %>%
    dplyr::group_by(election) %>%
    dplyr::summarise(valid_bp = round(mean(!is.na(bp_constituency)),2),
                     mean_margin = round(mean(absolute_margin_last_election, na.rm = T),2),
                     median_margin = round(mean(absolute_margin_last_election, na.rm = T),2),
                     safe = round(mean(absolute_margin_last_election > 0.1, na.rm = T),2),
                     ultrasafe = round(mean(absolute_margin_last_election > 0.2, na.rm = T),2),
                     med_distance = round(median(distance_to_const, na.rm = T)/1000,2),
                     parachuters = round(mean(parachuter, na.rm = T),2)),
  
  deu_mps %>% dplyr::mutate(election = "Overall") %>%
    dplyr::group_by(election) %>%
    dplyr::summarise(valid_bp = round(mean(!is.na(bp_constituency)),2),
                     mean_margin = round(mean(absolute_margin_last_election, na.rm = T),2),
                     median_margin = round(mean(absolute_margin_last_election, na.rm = T),2),
                     safe = round(mean(absolute_margin_last_election > 0.1, na.rm = T),2),
                     ultrasafe = round(mean(absolute_margin_last_election > 0.2, na.rm = T),2),
                     med_distance = round(median(distance_to_const, na.rm = T)/1000,2),
                     parachuters = round(mean(parachuter, na.rm = T),2))))

xtable(deu_table)

#### Regression Models (main text and Supplementary Material) ####

model1_uk <- glm(data = subset(uk_mps, incumbent == 0), parachuter ~
                   party_margin + pty2 + elected_in + 
                   constituency_land_area, family = "binomial")

model2_uk <- glm(data = subset(uk_mps, incumbent == 0), parachuter ~
                   party_margin + pty2*elected_in + 
                   constituency_land_area, family = "binomial")

model3_uk <- glm(data = subset(uk_mps, incumbent == 0), parachuter ~
                   party_margin + pty2*elected_in + 
                   log(constituency_land_area), family = "binomial")

model4_uk <- glm(data = subset(uk_mps, incumbent == 0), parachuter ~
                   party_margin + pty2*elected_in + 
                   constituency_land_area + sex, family = "binomial")

model5_uk <- glm(data = subset(uk_mps, incumbent == 0), parachuter ~
                   party_margin + pty2*elected_in + 
                   constituency_land_area + Country, family = "binomial")

model1_deu <- glm(data = subset(deu_mps, incumbent == 0), parachuter ~
                    margin_last_election + 
                    pty2 + election + constituency_land_area, family = "binomial")

model2_deu <- glm(data = subset(deu_mps, incumbent == 0), parachuter ~
                    margin_last_election + 
                    pty2*election + constituency_land_area, family = "binomial")

model3_deu <- glm(data = subset(deu_mps, incumbent == 0), parachuter ~
                    margin_last_election + 
                    pty2*election + log(constituency_land_area), family = "binomial")

model4_deu <- glm(data = subset(deu_mps, incumbent == 0), parachuter ~
                    margin_last_election + 
                    pty2*election + constituency_land_area + sex, family = "binomial")

model5_deu <- glm(data = subset(deu_mps, incumbent == 0), parachuter ~
                    margin_last_election + 
                    pty2*election + constituency_land_area + east_west, family = "binomial")

# prints regression for UK sample (table 8)
stargazer(model1_uk, model2_uk, single.row = T, digits = 2)

# prints regression for DEU sample (table 9)
stargazer(model1_deu, model2_deu, single.row = T, digits = 2)

# robustness checks for UK sample (Supplementary Material, table S15)
stargazer(model3_uk, model4_uk, model5_uk, single.row = T, digits = 2)

# robustness checks for DEU sample (Supplementary Material, table S16)
stargazer(model3_deu, model4_deu, model5_deu, single.row = T, digits = 2)


#### Model Visualisation ####

# generates model plots (figure 6)

p1 <- plot_model(model1_uk, type = "pred", terms = c("party_margin [all]", "pty2[lab, con]")) + 
  theme_sjplot() + theme(plot.title = element_text(hjust = 0.5), legend.position = "none") + 
  aes(fill = group, colour = group) + 
  ylab("Predicted Probability") + xlab("Margin in the last election") + 
  ggtitle("Predicted probability of a newly \n elected British MP (2001-2019) \n being a parachuter (model 1)") + 
  scale_color_manual(labels = c("Lab", "Con"), values = c("black", "gray"), name = "Party") + 
  scale_fill_manual(labels = c("Lab", "Con"), values = c("black", "gray"), name = "Party")

p2 <- plot_model(model2_uk, type = "pred", terms = c("party_margin [all]", "pty2[lab, con]")) + 
  theme_sjplot() + theme(plot.title = element_text(hjust = 0.5)) + 
  aes(fill = group, colour = group) + 
  ylab("Predicted Probability") + xlab("Margin in the last election") + 
  ggtitle("Predicted probability of a newly \n elected British MP (2001-2019) \n being a parachuter (model 2)") + 
  scale_color_manual(labels = c("Lab", "Con"), values = c("black", "gray"), name = "Party") + 
  scale_fill_manual(labels = c("Lab", "Con"), values = c("black", "gray"), name = "Party") 

p3 <- plot_model(model1_deu, type = "pred", terms = c("margin_last_election [all]", "pty2[SPD, CDU/CSU]")) + 
  theme_sjplot() + theme(plot.title = element_text(hjust = 0.5), legend.position = "none") + 
  aes(fill = group, colour = group) + 
  ylab("Predicted Probability") + xlab("Margin in the last election") + 
  ggtitle("Predicted probability of a newly \n elected German SM district MP (1998-2017) \n being a parachuter (model 1)") + 
  scale_color_manual(labels = c("SPD", "CDU/CSU"), values = c("black", "gray"), name = "Party") + 
  scale_fill_manual(labels = c("SPD", "CDU/CSU"), values = c("black", "gray"), name = "Party")

p4 <- plot_model(model2_deu, type = "pred", terms = c("margin_last_election [all]", "pty2[SPD, CDU/CSU]")) + 
  theme_sjplot() + theme(plot.title = element_text(hjust = 0.5)) + 
  aes(fill = group, colour = group) + 
  ylab("Predicted Probability") + xlab("Margin in the last election") + 
  ggtitle("Predicted probability of a newly \n elected German SM district MP (1998-2017) \n being a parachuter (model 2)") + 
  scale_color_manual(labels = c("SPD", "CDU/CSU"), values = c("black", "gray"), name = "Party") + 
  scale_fill_manual(labels = c("SPD", "CDU/CSU"), values = c("black", "gray"), name = "Party")

(p1 | p2) /
  (p3 | p4)

ggsave("modelplots_casestudies.jpg", width = 10.5, height = 10.5)
